Validating Bias-Correction of CMIP6 Data

Bias corrected timelines for the different bias corrected variables will be compared against the reference observations (OISSTv2, SODA) to check how accurately they represent observed conditions, and how that changes across areas of interest.

Load Data

Target Areas

# Getting Timeseries and Shapefile Locations via gmRi
# returns path to oisst timeseries & path to shapefile:
region_groups <- c("nelme_regions", "gmri_sst_focal_areas", "lme", "nmfs_trawl_regions")
region_lookup <- map(region_groups, function(region_group){
    mask_details <- get_timeseries_paths(region_group) }) %>% 
  setNames(region_groups)

# Grab a couple to use from each as contenders


# NMFS Trawl Regions
trawl_gb  <- read_sf(region_lookup$nmfs_trawl_regions$georges_bank$shape_path) 
trawl_gom <- read_sf(region_lookup$nmfs_trawl_regions$gulf_of_maine$shape_path)
trawl_mab <- read_sf(region_lookup$nmfs_trawl_regions$mid_atlantic_bight$shape_path)
trawl_sne <- read_sf(region_lookup$nmfs_trawl_regions$southern_new_england$shape_path)


# Load all the strata and just filter out the crap ones
trawl_full <- read_sf(str_c(res_path, "Shapefiles/BottomTrawlStrata/BTS_Strata.shp"))  %>% 
  clean_names() %>% 
  filter(strata >= 01010 ,
         strata <= 01760,
         strata != 1310,
         strata != 1320,
         strata != 1330,
         strata != 1350,
         strata != 1410,
         strata != 1420,
         strata != 1490) 


# DFO Data
dfo_path <- shared.path(group = "Mills Lab", folder = "Projects/DFO_survey_data/strata_shapefiles")
dfo_area <- read_sf(str_c(dfo_path, "MaritimesRegionEcosystemAssessmentBoundary.shp"))


# set overall zoom for maps
xlimz <- c(-76, -57)
ylimz <- c(35, 48)

# base map
base_map <- ggplot() +
  geom_sf(data = new_england) +
  geom_sf(data = canada) +
  geom_sf(data = greenland) +
  theme(legend.position = "bottom") +
  guides(color = guide_legend(title.position = "top", title.hjust = 0.5)) +
  labs(color = "", 
       fill = "")

NMFS Trawl Areas

# Build off of base map
base_map +
  # geom_sf(data = trawl_full, aes(fill = "Trawl Survey Area")) +
  geom_sf(data = st_union(trawl_full), aes(fill = "Trawl Survey Area"), alpha = 0.2) +
  geom_sf(data = st_union(trawl_gom), aes(fill = "Gulf of Maine Survey Area"), alpha = 0.2) +
  coord_sf(xlim = xlimz, ylim = ylimz) +
  labs(title = "NMFS Trawl Region")

DFO Survey Areas

# Map dfo data 
base_map + 
  geom_sf(data = dfo_area, aes(fill = "DFO Ecosystems Assesment Boundary"), alpha = 0.2) +
  coord_sf(xlim = xlimz, ylim = ylimz) +
  labs(title = "DFO Region")

All Areas

base_map +
  geom_sf(data = st_union(trawl_full), aes(fill = "Trawl Survey Area"), alpha = 0.2) +
  geom_sf(data = st_union(trawl_gom), aes(fill = "Gulf of Maine Survey Area"), alpha = 0.2) +
  geom_sf(data = dfo_area, aes(fill = "DFO Ecosystems Assesment Boundary"), alpha = 0.2) +
  coord_sf(xlim = xlimz, ylim = ylimz) +
  labs(title = "All Areas")

SODA

# SODA vars
soda_ssal  <- stack(str_c(soda_path, "SODA_Salt_Red.nc"))
soda_bsal  <- stack(str_c(soda_path, "SODA_Salt_Red_bottomLayer.nc"))
soda_btemp <- stack(str_c(soda_path, "SODA_Temp_Red_bottomLayer.nc"))

OISSTv2

# OISST 
data_window <- data.frame(lon = c(-72, -61),
                          lat = c(39, 46.1),
                          time = as.Date(c("1981-09-01", "2020-12-31")))

# Load Data
oisst <- oisst_window_load(oisst_path = oisst_path, 
                           data_window = data_window, 
                           anomalies = FALSE)


# Convert to Monthly
oisst <- stack(oisst)

# build a key for months using the layer names
month_key <- seq.Date(min(data_window$time), max(data_window$time), by = "month")
month_key <- str_c("X", month_key) %>% str_replace_all("-", ".")
month_key <- str_sub(month_key, 1, -4)

# 
oisst_monthly <- map(month_key, function(wut_month){
  
  # Identify corresponding days in month
  which_layers  <- str_detect(names(oisst), wut_month)
  layer_indices <- which(which_layers)
  
  if(length(layer_indices) == 0) { print(str_c(length(layer_indices), " days for ", wut_month))  }
  
  # Get mean
  month_mean <- calc(oisst[[layer_indices]], fun = mean, na.rm = T)

  # set month as name
  month_mean <- setNames(month_mean, wut_month)
  return(month_mean)
  
  
}) 

# Crop them
oisst_monthly <- stack(oisst_monthly)

# clean up
rm(oisst)

Cropping Functions

####  Processing Functions
# Masking function for an area
mask_shape <- function(in_ras, in_mask){
  r1 <- crop(x = in_ras, y = in_mask)
  r2 <- mask(x = r1, mask = in_mask)
  return(r2)}

# Function to turn it into a dataframe using cellstats
timeseries_from_mask <- function(ras_in, var_name){
  ts <- cellStats(ras_in, mean, na.rm = T)
  ts <- ts %>% 
    as.data.frame() %>% 
    rownames_to_column() %>% 
    setNames(c("date", var_name)) %>% 
    mutate(date = str_remove(date, "X"),
           date = str_replace_all(date, "[.]", "-"))
  
  date_len <- str_length(ts$date[1])
  if(date_len == 7){
    ts <- ts %>% 
      mutate(date = str_c(date, "-15"),
             date = as.Date(date))
  } else{
    ts <- mutate(ts, date = as.Date(date))
  }
  
  return(ts)
}

Processing Observed Variables

# Put observed variables in a list
observed_vars <- list(
  bot_temp  = soda_btemp,
  bot_sal   = soda_bsal,
  surf_sal  = soda_ssal,
  surf_temp = oisst_monthly)

# Now mask them all and get timeseries
masked_vars <- imap(observed_vars, function(masking_var, var_name){
  
  # Mask and get timeseries
  masked_gom   <- mask_shape(masking_var, trawl_gom)
  masked_gom   <- timeseries_from_mask(masked_gom, var_name)
  masked_trawl <- mask_shape(masking_var, trawl_full)
  masked_trawl <- timeseries_from_mask(masked_trawl,var_name)
  masked_dfo   <- mask_shape(masking_var, dfo_area)
  masked_dfo   <- timeseries_from_mask(masked_dfo, var_name)
    
  # Put in list
  list(
    nefsc_gom  = masked_gom,
    nefsc_full = masked_trawl,
    dfo_full   = masked_dfo)
  
})


# Get Time Periods to SODA/OISST
soda_years  <- unique(str_sub(names(soda_ssal), 2, 5))
oisst_years <- unique(str_sub(names(oisst_monthly), 2, 5))


# clean up
rm(soda_bsal, soda_btemp, soda_ssal, oisst_monthly)

CMIP6 Bias-Corrected

Mean

# CMIP Bias Corrected
cmip_stemp <- stack(str_c(cmip_path, "BiasCorrected/surf_temp_OISST_bias_corrected_mean.grd"))
cmip_btemp <- stack(str_c(cmip_path, "BiasCorrected/bot_temp_SODA_bias_corrected_mean.grd"))
cmip_ssal  <- stack(str_c(cmip_path, "BiasCorrected/surf_sal_SODA_bias_corrected_mean.grd"))
cmip_bsal  <- stack(str_c(cmip_path, "BiasCorrected/bot_sal_SODA_bias_corrected_mean.grd"))


# Cmip matches to SODA/OISST Time Periods
soda_index  <- which(str_sub(names(cmip_bsal), 2, 5) %in% soda_years)
oisst_index <- which(str_sub(names(cmip_stemp), 2, 5) %in% oisst_years)


# Put them in a list
cmip_vars <- list(
  bot_temp  = cmip_btemp,#[[soda_index]],
  bot_sal   = cmip_bsal,#[[soda_index]],
  surf_sal  = cmip_ssal,#[[soda_index]],
  surf_temp = cmip_stemp#[[oisst_index]]
  )

# Rotate them:
cmip_vars <- map(cmip_vars, raster::rotate)



# Now mask them all and get timeseries
masked_cmip <- imap(cmip_vars, function(masking_var, var_name){
  
  # Mask and get timeseries
  masked_gom   <- mask_shape(masking_var, trawl_gom)
  masked_gom   <- timeseries_from_mask(masked_gom, var_name)
  masked_trawl <- mask_shape(masking_var, trawl_full)
  masked_trawl <- timeseries_from_mask(masked_trawl,var_name)
  masked_dfo   <- mask_shape(masking_var, dfo_area)
  masked_dfo   <- timeseries_from_mask(masked_dfo, var_name)
    
  # put in list
  list(
    nefsc_gom  = masked_gom,
    nefsc_full = masked_trawl,
    dfo_full   = masked_dfo
  )
  
})

5th Percentile

# CMIP Bias Corrected
cmip_stemp_5 <- stack(str_c(cmip_path, "BiasCorrected/surf_temp_OISST_bias_corrected_5thpercentile.grd"))
cmip_btemp_5 <- stack(str_c(cmip_path, "BiasCorrected/bot_temp_SODA_bias_corrected_5thpercentile.grd"))
cmip_ssal_5  <- stack(str_c(cmip_path, "BiasCorrected/surf_sal_SODA_bias_corrected_5thpercentile.grd"))
cmip_bsal_5  <- stack(str_c(cmip_path, "BiasCorrected/bot_sal_SODA_bias_corrected_5thpercentile.grd"))


# Put them in a list
cmip_vars_5 <- list(
  bot_temp  = cmip_btemp_5,#[[soda_index]],
  bot_sal   = cmip_bsal_5,#[[soda_index]],
  surf_sal  = cmip_ssal_5,#[[soda_index]],
  surf_temp = cmip_stemp_5#[[oisst_index]]
  )

# Rotate them:
cmip_vars_5 <- map(cmip_vars_5, raster::rotate)

# Now mask them all and get timeseries
masked_cmip_5 <- imap(cmip_vars_5, function(masking_var, var_name){
  
  # Mask and get timeseries
  masked_gom   <- mask_shape(masking_var, trawl_gom)
  masked_gom   <- timeseries_from_mask(masked_gom, var_name)
  masked_trawl <- mask_shape(masking_var, trawl_full)
  masked_trawl <- timeseries_from_mask(masked_trawl,var_name)
  masked_dfo   <- mask_shape(masking_var, dfo_area)
  masked_dfo   <- timeseries_from_mask(masked_dfo, var_name)
    
  # put in list
  list(
    nefsc_gom  = masked_gom,
    nefsc_full = masked_trawl,
    dfo_full   = masked_dfo
  )
  
})

95th Percentile

# CMIP Bias Corrected
cmip_stemp_95 <- stack(str_c(cmip_path, "BiasCorrected/surf_temp_OISST_bias_corrected_95thpercentile.grd"))
cmip_btemp_95 <- stack(str_c(cmip_path, "BiasCorrected/bot_temp_SODA_bias_corrected_95thpercentile.grd"))
cmip_ssal_95  <- stack(str_c(cmip_path, "BiasCorrected/surf_sal_SODA_bias_corrected_95thpercentile.grd"))
cmip_bsal_95  <- stack(str_c(cmip_path, "BiasCorrected/bot_sal_SODA_bias_corrected_95thpercentile.grd"))


# Put them in a list
cmip_vars_95 <- list(
  bot_temp  = cmip_btemp_95,#[[soda_index]],
  bot_sal   = cmip_bsal_95,#[[soda_index]],
  surf_sal  = cmip_ssal_95,#[[soda_index]],
  surf_temp = cmip_stemp_95#[[oisst_index]]
  )

# Rotate them:
cmip_vars_95 <- map(cmip_vars_95, raster::rotate)


# Now mask them all and get timeseries
masked_cmip_95 <- imap(cmip_vars_95, function(masking_var, var_name){
  
  # Mask and get timeseries
  masked_gom   <- mask_shape(masking_var, trawl_gom)
  masked_gom   <- timeseries_from_mask(masked_gom, var_name)
  masked_trawl <- mask_shape(masking_var, trawl_full)
  masked_trawl <- timeseries_from_mask(masked_trawl,var_name)
  masked_dfo   <- mask_shape(masking_var, dfo_area)
  masked_dfo   <- timeseries_from_mask(masked_dfo, var_name)
    
  # put in list
  list(
    nefsc_gom  = masked_gom,
    nefsc_full = masked_trawl,
    dfo_full   = masked_dfo
  )
  
})

Validating Seasonality

comparison_plot <- function(var_option, mask_option){
  
  obs_data  <- masked_vars[[var_option]][[mask_option]]
  cmip_mean <- masked_cmip[[var_option]][[mask_option]]
  cmip_5    <- masked_cmip_5[[var_option]][[mask_option]]
  cmip_95   <- masked_cmip_95[[var_option]][[mask_option]]
  
  # Add data source
  cmip_mean$data_source <- "CMIP Mean"
  cmip_5$data_source    <- "CMIP 5th Perc."
  cmip_95$data_source   <- "CMIP 95th Perc."
  
  # # Format dates  
  # cmip_mean$date <- as.Date(str_c(cmip_mean$date, "-15"))
  # cmip_5$date    <- as.Date(str_c(cmip_5$date, "-15"))
  # cmip_95$date   <- as.Date(str_c(cmip_95$date, "-15"))
  # 
  # # Separate action for oisst/soda
  # if(var_option == "surf_temp"){
  #   obs_data$date <- as.Date(str_c(obs_data$date, "-15"))
  #   obs_data$data_source <- "OISSTv2"
  # 
  #   } else{
  #   obs_data$date <- as.Date(obs_data$date)
  #   obs_data$data_source <- "SODA"
  #   }
  
    # Separate action for labeling oisst/soda
  if(var_option == "surf_temp"){
    obs_data$data_source <- "OISSTv2"

    } else{
   obs_data$data_source <- "SODA"
    }
  
  
  # Filter Dates to focus on shared timeperiod
  cmip_mean <- filter(cmip_mean, between(date, min(obs_data$date), max(obs_data$date)))
  cmip_5 <- filter(cmip_5, between(date, min(obs_data$date), max(obs_data$date)))
  cmip_95 <- filter(cmip_95, between(date, min(obs_data$date), max(obs_data$date)))
  
  
  # format variable for display on axis
  var_label <- str_replace(var_option, "_", " ")
  var_label <- str_to_title(var_label)
  
  ggplot() +
     geom_line(data = obs_data, aes_string("date", var_option, color = "data_source")) +
     geom_line(data = cmip_mean, aes_string("date", var_option, color = "data_source")) +
     geom_line(data = cmip_5, aes_string("date", var_option, color = "data_source")) +
     geom_line(data = cmip_95, aes_string("date", var_option, color = "data_source")) +
     scale_color_gmri() +
     labs(x = "", y = var_label, color = "") +
     facet_zoom(xlim =c(as.Date("2000-01-01"), as.Date("2002-12-31")), zoom.size = .5) +
    theme(legend.position = "bottom")
  
}

Bottom Temperature

var_option <- "bot_temp"

Gulf of Maine

mask_option <- "nefsc_gom"

comparison_plot(var_option = var_option, mask_option = mask_option)

NMFS Survey Area

mask_option <- "nefsc_full"

comparison_plot(var_option = var_option, mask_option = mask_option)

DFO Survey Area

mask_option <- "dfo_full"

comparison_plot(var_option = var_option, mask_option = mask_option)

Surface Temperature

var_option = "surf_temp"

Gulf of Maine

mask_option <- "nefsc_gom"

comparison_plot(var_option = var_option, mask_option = mask_option)

NMFS Survey Area

mask_option <- "nefsc_full"

comparison_plot(var_option = var_option, mask_option = mask_option)

DFO Survey Area

mask_option <- "dfo_full"

comparison_plot(var_option = var_option, mask_option = mask_option)

Bottom Salinity

var_option <- "bot_sal"

Gulf of Maine

mask_option <- "nefsc_gom"

comparison_plot(var_option = var_option, mask_option = mask_option)

NMFS Survey Area

mask_option <- "nefsc_full"

comparison_plot(var_option = var_option, mask_option = mask_option)

DFO Survey Area

mask_option <- "dfo_full"

comparison_plot(var_option = var_option, mask_option = mask_option)

Surface Salinity

var_option <- "surf_sal"

Gulf of Maine

mask_option <- "nefsc_gom"

comparison_plot(var_option = var_option, mask_option = mask_option)

NMFS Survey Area

mask_option <- "nefsc_full"

comparison_plot(var_option = var_option, mask_option = mask_option)

DFO Survey Area

mask_option <- "dfo_full"

comparison_plot(var_option = var_option, mask_option = mask_option)

Regional Trajectories

# Function to Plot Single Variable Trajectories for the Different Regions
clim_futures <- function(var_option){
  
  # Subset the variables
  obs_data  <- masked_vars[[var_option]]
  cmip_mean <- masked_cmip[[var_option]]
  cmip_5    <- masked_cmip_5[[var_option]]
  cmip_95   <- masked_cmip_95[[var_option]]
  
  # Label the sources for the CMIP Data, process areas
  
  obs_data  <- map_dfr(obs_data, ~ mutate(.x, data_source = "Reference Data"), .id = "area")
  cmip_mean <- map_dfr(cmip_mean, ~ mutate(.x, data_source = "CMIP Mean"), .id = "area")
  cmip_5    <- map_dfr(cmip_5, ~ mutate(.x, data_source = "CMIP 5th Perc."), .id = "area")
  cmip_95   <- map_dfr(cmip_95, ~ mutate(.x, data_source = "CMIP 95th Perc."), .id = "area")
  
  # combine the different quantiles
  cmip_all <- bind_rows(list(obs_data, cmip_mean, cmip_5, cmip_95)) %>% 
    mutate(data_source = factor(data_source, 
                                levels = c("Reference Data", "CMIP 5th Perc.", 
                                           "CMIP Mean", "CMIP 95th Perc.")),
           area = str_replace_all(area, "_", " "),
           area = str_to_title(area),
           area = str_replace(area, "Dfo", "DFO"),
           area = str_replace(area, "Nefsc", "NEFSC"),
           area = factor(area, levels = c("DFO Full", "NEFSC Gom", "NEFSC Full")),
           yr = lubridate::year(date),
           month = lubridate::month(date)) 
  
  
  #group on year and get annual averayges
  var_sym <- sym(var_option)
  cmip_all <- cmip_all %>% 
    group_by(yr, area, data_source) %>% 
    summarise({{ var_sym }} := mean({{ var_sym }}, na.rm = T),
              .groups = "keep") %>% 
    ungroup() %>% 
    mutate(yr = as.numeric(as.character(yr))) %>% 
    filter(yr >= 1982)
  
  
  # format variable for display on axis
  var_titles <- c(
    "bot_temp" = expression("Bottom Temperature"~degree~"C"),
    "surf_temp" = expression("Surface Temperature"~degree~"C"),
    "bot_sal" = "Bottom Salinity",
    "surf_sal" = "Surface Salinity")
  var_label <- var_titles[var_option]
  
  
  
  #pivot the sources wider so we can single them out asier
  data_wide <- cmip_all %>% 
    pivot_wider(names_from = data_source, values_from = {{var_option}})
  
  
  # Build plot
  regional_comparison_plot <-  ggplot(data_wide, aes(x = yr)) +
      geom_ribbon(aes(ymin = `CMIP 5th Perc.`, 
                      ymax = `CMIP 95th Perc.`),
                  fill = "gray80") +
      geom_line(aes(y = `CMIP Mean`, 
                    color = "CMIP Mean")) +
      geom_line(aes(y = `Reference Data`, color = "Reference Data")) +
      geom_point(aes(y = `Reference Data`, color = "Reference Data"),
                 size = 0.25) +
      scale_color_manual(values = c(
        "Reference Data" = "gray10", 
        "CMIP Mean"          = "gray50")) +
      facet_wrap(~area, nrow = 3) +
      labs(x = "", y = var_label, color = "") +
      theme(legend.position = "bottom")
  
  
  return(regional_comparison_plot)
    
  
  
}

Bottom Temperature

clim_futures(var_option = "bot_temp")

Surface Temperature

clim_futures(var_option = "surf_temp")

Bottom Salinity

clim_futures(var_option = "bot_sal")

Surface Salinity

clim_futures(var_option = "surf_sal")